# Credit - UT Austin Computer Science Department # Function to create a combo boxplot and histogram for continuous (I/R --> int64 and float64) variables. def boxplot_histogram (feature, figsize=(5,3.5), bins = None): sns.set(font_scale=1) f2, (ax_box2, ax_hist2) = plt.subplots(nrows = 2, sharex = True, gridspec_kw = {"height_ratios": (.25, .75)}, figsize = figsize ) sns.boxplot(feature, ax=ax_box2, orient = "h", showmeans=True, color='red') # mean value will be noted sns.distplot(feature, kde=F, ax=ax_hist2, bins=bins) if bins else sns.distplot(feature, kde=False, ax=ax_hist2, fit=norm) ax_hist2.axvline(np.mean(feature), color='g', linestyle='--') # Add mean to the histogram ax_hist2.axvline(np.median(feature), color='black', linestyle='-') # Add median to the histogram plt.axvline(feature.mode()[0], color='r', linestyle='dashed', linewidth=1); #Add mode to the histogram
%%time # captures time it takes to run this block of code # Choose the type of classifier. rf_tuned = RandomForestClassifier(class_weight={0:0.35,1:0.65},random_state=1) parameters = { 'max_depth': list(np.arange(3,10,1)), 'max_features': np.arange(0.6,1.1,0.1), 'max_samples': np.arange(0.7,1.1,0.1), 'min_samples_split': np.arange(2, 20, 5), 'n_estimators': np.arange(30,160,20), 'min_impurity_decrease': [0.0001,0.001,0.01,0.1] } # Type of scoring used to compare parameter combinations scorer = metrics.make_scorer(metrics.recall_score) # Run the grid search grid_obj = GridSearchCV(rf_tuned, parameters, scoring=scorer,cv=5,n_jobs=-1) grid_obj = grid_obj.fit(X_train, y_train) # Set the clf to the best combination of parameters rf_tuned = grid_obj.best_estimator_ # Fit the best algorithm to the data. rf_tuned.fit(X_train, y_train) rf_tuned.fit(X_train, y_train)